********* Share of physicians applying to a specialty from top 5 schools 

	* Keep application years 

	use "${clean_data}/panel_physicians_clean", clear	
	gunique(npi)
	keep if year==provider_enumeration_year
	gunique(npi)
	keep if !missing(med_sch)
	gunique(npi)
	drop if grd_yr < provider_enumeration_year-4
	gunique(npi)

	* Merge in information about medical school ranking
	merge m:1 med_sch using "${intermediate_data}/medical_school_ranking/medical_school_ranks.dta", keep(master match) keepusing(newsrank20*)
	gunique(npi)

	* Read out the school rank for the year of NPI enumeration
	gen newsrank=.
	forv y = 2005/2017 {		
	replace newsrank=newsrank`y' if `y'==provider_enumeration_year
	}

	* If a school was not ranked in the year of graduation, assign the max rank 
	* out of all observed ranked years 
	egen maxnewsrank=rowmax(newsrank20*)
	replace newsrank=maxnewsrank if missing(newsrank)
	drop newsrank20* maxnewsrank

	keep id personid npi year spec_category_code spec_category_desc ranked_school newsrank 
	gunique(npi)
	gisid id
	gisid npi

	save "${intermediate_data}/spec_choice/specialty_choice_sample.dta", replace
	
********* Descriptive relationship between income and share of physicians from top 5 schools in a specialty
	
	* Select specialty features of interest

	use "${intermediate_data}/specialty_characteristics/spec_category_characteristics_age4055.dta", clear
	drop if spec_category_code==0
	keep spec_category_code spec_category_desc year ptotinc wkh
	gen hourly_income=ptotinc/(52*wkh)
	drop ptotinc wkh 
	ren hourly_income mean_hourly_income
	
	save "${temp}/choice_features.dta", replace
	
	* Save data for binscatter

	use  "${intermediate_data}/spec_choice/specialty_choice_sample.dta", clear
	merge m:1 spec_category_code year using "${temp}/choice_features.dta", keep(master match) nogen 
	gen top5_school=(ranked_school==1 & newsrank<6)
	
	egen count_top5grads=sum(top5_school)
	gen count_anygrads=_N
	
	collapse (mean) top5_school_share=top5_school mean_hourly_income (sum) count_top5grads_byspeccat=top5_school, by(spec_category_code spec_category_desc)
	keep spec_category_code spec_category_desc top5_school_share mean_hourly_income count_top5grads_byspeccat
	
	g N_UR = count_top5grads_byspeccat
	ds spec_category_desc, not
	drbest_docinc `r(varlist)'
	order N_UR, last
	export delimited using "${mypath}/intermediate_csv/choice_correlating_top5.csv", replace dataf delim(tab)  
